This vignette shows how to use Signac with Seurat. There are three parts: Seurat, Signac and then visualization. We use an example CITE-seq data set from 10X Genomics.

Seurat

Start with the standard pre-processing steps for a Seurat object.

library(Seurat)

Download data from 10X Genomics.

download.file("https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_protein_v3/pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5", destfile = "pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5")

Create a Seurat object, and then perform SCTransform normalization. Note:

# load dataset
E = Read10X_h5(filename = "pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5")
pbmc <- CreateSeuratObject(counts = E$`Gene Expression`, project = "pbmc")

# store mitochondrial percentage in object meta data
pbmc <- PercentageFeatureSet(pbmc, pattern = "^MT-", col.name = "percent.mt")

# run sctransform
pbmc <- SCTransform(pbmc,vars.to.regress = "percent.mt", verbose = FALSE)

Perform dimensionality reduction by PCA and UMAP embedding. Note: Signac actually needs these commands to be run, since it uses the nearest neighbor graph.

# These are now standard steps in the Seurat workflow for visualization and clustering
pbmc <- RunPCA(pbmc, verbose = FALSE)
pbmc <- RunUMAP(pbmc, dims = 1:30, verbose = FALSE)
pbmc <- FindNeighbors(pbmc, dims = 1:30, verbose = FALSE)
DimPlot(pbmc, label = TRUE) + NoLegend()

Signac

Now, make sure you have the Signac package installed.

devtools::install_github("mathewchamberlain/Signac")
library(Signac)

Generate Signac labels for the Seurat object. Note: * This is the main function for Signac. * Optionally, you can do parallel computing by setting num.cores > 1 in the Signac function. * Run time is ~minutes for ~10,000 cells.

# Run Signac
data("training_HPCA")
labels <- Signac(pbmc, R = training_HPCA) # optionally do parallel computing by setting num.cores > 1
celltypes = Generate_lbls(labels, E = pbmc)

Visualizations

Now we can visualize the cell type classifications at many different levels: Immune and nonimmune

pbmc <- Seurat::AddMetaData(pbmc, metadata=celltypes$Immune, col.name = "immmune")
pbmc <- Seurat::SetIdent(pbmc, value='immmune')
DimPlot(pbmc)

pbmc <- Seurat::AddMetaData(pbmc, metadata=celltypes$L2, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)

lbls = factor(celltypes$CellTypes)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)

lbls = factor(celltypes$CellStates)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)

lbls = factor(celltypes$CellStates_novel)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)

Add protein expression information

pbmc[["ADT"]] <- CreateAssayObject(counts = E$`Antibody Capture`[,colnames(E$`Antibody Capture`) %in% colnames(pbmc)])
## Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
pbmc <- NormalizeData(pbmc, assay = "ADT", normalization.method = "CLR")
pbmc <- ScaleData(pbmc, assay = "ADT")

Visualize protein information on the RNA clustering result

FeaturePlot(pbmc, features = c("CD8a-TotalSeqB", "CD8A"), ncol = 2)
## Warning: Could not find CD8a-TotalSeqB in the default search locations, found in ADT assay
## instead

RidgePlot(pbmc, features = c("CD8a-TotalSeqB"))
## Warning: Could not find CD8a-TotalSeqB in the default search locations, found in ADT assay
## instead

Identify differentially expressed proteins between clusters

# Downsample the clusters to a maximum of 500 cells each (makes the heatmap easier to see for
# small clusters)
pbmc.small <- subset(pbmc, downsample = 500)

# Find protein markers for all clusters, and draw a heatmap
adt.markers <- FindAllMarkers(pbmc.small, assay = "ADT", only.pos = TRUE, verbose = F)
DoHeatmap(pbmc.small, features = unique(adt.markers$gene), assay = "ADT", angle = 90) + NoLegend()

DefaultAssay(pbmc) <- "ADT"

Cluster the data based on protein expression information.

pbmc <- FindVariableFeatures(pbmc, selection.method = "vst")
pbmc <- ScaleData(pbmc)
pbmc <- RunPCA(pbmc, verbose = F)
## Warning in irlba(A = t(x = object), nv = npcs, ...): You're computing too large a
## percentage of total singular values, use a standard svd instead.
## Warning in irlba(A = t(x = object), nv = npcs, ...): did not converge--results might be
## invalid!; try increasing work or maxit
pbmc <- FindNeighbors(pbmc)
pbmc <- FindClusters(pbmc, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7865
## Number of edges: 260140
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8849
## Number of communities: 15
## Elapsed time: 0 seconds
pbmc <- RunUMAP(pbmc, dims = 1:10)

Visualize Signac cell types on protein clusters

lbls = factor(celltypes$CellTypes)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)

lbls = factor(celltypes$CellStates)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)

FeaturePlot(pbmc, features = c("CD8a-TotalSeqB", "CD8A"))
## Warning: Found the following features in more than one assay, excluding the default. We
## will not include these in the final dataframe: CD8A
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The following
## requested variables were not found: CD8A

Session information

## R version 3.5.0 (2018-04-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS: /usr/lib64/libblas.so.3.4.2
## LAPACK: /site/ne/app/x86_64/R/v3.5.0/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8       
##  [4] LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
## [10] LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bzinb_1.0.4        Signac_2.0.7       pscl_1.5.5         MASS_7.3-51.1     
##  [5] forcats_0.5.0      stringr_1.4.0      dplyr_0.8.5        purrr_0.3.3       
##  [9] readr_1.3.1        tidyr_1.1.1        tibble_3.0.3       tidyverse_1.3.0   
## [13] sctransform_0.2.1  Seurat_3.2.0       ggplot2_3.3.2      veni_1.0.3        
## [17] Matrix.utils_0.9.8 Matrix_1.2-18      biomaRt_2.38.0    
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.1        bit64_0.9-7           knitr_1.28           
##   [4] irlba_2.3.3           data.table_1.13.0     rpart_4.1-15         
##   [7] RCurl_1.98-1.1        generics_0.0.2        BiocGenerics_0.28.0  
##  [10] cowplot_1.0.0         RSQLite_2.2.0         RANN_2.6.1           
##  [13] europepmc_0.4         future_1.16.0         bit_1.1-15.2         
##  [16] enrichplot_1.2.0      spatstat.data_1.4-3   xml2_1.3.1           
##  [19] lubridate_1.7.8       httpuv_1.5.2          assertthat_0.2.1     
##  [22] viridis_0.5.1         xfun_0.12             hms_0.5.3            
##  [25] evaluate_0.14         promises_1.1.0        fansi_0.4.1          
##  [28] progress_1.2.2        caTools_1.17.1.1      dbplyr_1.4.4         
##  [31] readxl_1.3.1          igraph_1.2.5          DBI_1.1.0            
##  [34] htmlwidgets_1.5.1     stats4_3.5.0          ellipsis_0.3.0       
##  [37] RSpectra_0.16-0       backports_1.1.6       deldir_0.1-28        
##  [40] vctrs_0.3.2           Biobase_2.42.0        ROCR_1.0-7           
##  [43] abind_1.4-5           withr_2.1.2           ggforce_0.3.2        
##  [46] packrat_0.5.0         grr_0.9.5             triebeard_0.3.0      
##  [49] checkmate_2.0.0       aplpack_1.3.3         prettyunits_1.1.1    
##  [52] goftest_1.2-2         cluster_2.1.0         DOSE_3.8.2           
##  [55] ape_5.3               lazyeval_0.2.2        crayon_1.3.4         
##  [58] hdf5r_1.0.1           pkgconfig_2.0.3       labeling_0.3         
##  [61] tweenr_1.0.1          nlme_3.1-145          rlang_0.4.8          
##  [64] globals_0.12.5        RJSONIO_1.3-1.4       lifecycle_0.2.0      
##  [67] miniUI_0.1.1.1        neuralnet_1.44.2      modelr_0.1.8         
##  [70] rsvd_1.0.3            cellranger_1.1.0      tcltk_3.5.0          
##  [73] randomForest_4.6-14   polyclip_1.10-0       lmtest_0.9-37        
##  [76] graph_1.60.0          urltools_1.7.3        Rhdf5lib_1.4.3       
##  [79] boot_1.3-24           zoo_1.8-4             reprex_0.3.0         
##  [82] ggridges_0.5.2        png_0.1-7             viridisLite_0.3.0    
##  [85] bitops_1.0-6          KernSmooth_2.23-16    blob_1.2.1           
##  [88] qvalue_2.14.1         gridGraphics_0.5-0    S4Vectors_0.20.1     
##  [91] reactome.db_1.66.0    scales_1.1.0          memoise_1.1.0        
##  [94] graphite_1.28.2       magrittr_1.5          plyr_1.8.6           
##  [97] ica_1.0-2             gplots_3.0.3          gdata_2.18.0         
## [100] compiler_3.5.0        lsei_1.2-0            RColorBrewer_1.1-2   
## [103] lme4_1.1-23           fitdistrplus_1.0-14   cli_2.0.2            
## [106] listenv_0.8.0         patchwork_1.0.1       pbapply_1.4-2        
## [109] mgcv_1.8-31           tidyselect_1.1.0      stringi_1.4.6        
## [112] yaml_2.2.1            GOSemSim_2.8.0        ggrepel_0.8.2        
## [115] grid_3.5.0            fastmatch_1.1-0       tools_3.5.0          
## [118] future.apply_1.4.0    parallel_3.5.0        rstudioapi_0.11      
## [121] gridExtra_2.3         farver_2.0.3          Rtsne_0.15           
## [124] ggraph_2.0.3          digest_0.6.18         rvcheck_0.1.8        
## [127] BiocManager_1.30.10   shiny_1.4.0.2         Rcpp_1.0.4.6         
## [130] broom_0.7.0           later_1.0.0           RcppAnnoy_0.0.16     
## [133] httr_1.4.1            AnnotationDbi_1.44.0  npsurv_0.4-0         
## [136] colorspace_1.4-0      rvest_0.3.5           XML_3.99-0.3         
## [139] fs_1.4.1              tensor_1.5            reticulate_1.16-9001 
## [142] IRanges_2.16.0        splines_3.5.0         uwot_0.1.8           
## [145] statmod_1.4.34        spatstat.utils_1.17-0 graphlayouts_0.6.0   
## [148] ggplotify_0.0.5       plotly_4.9.2.1        xtable_1.8-4         
## [151] jsonlite_1.6.1        nloptr_1.2.2.1        spatstat_1.64-1      
## [154] tidygraph_1.2.0       UpSetR_1.4.0          glasso_1.11          
## [157] R6_2.4.1              pillar_1.4.3          htmltools_0.4.0      
## [160] mime_0.9              glue_1.4.0            fastmap_1.0.1        
## [163] minqa_1.2.4           BiocParallel_1.16.6   class_7.3-16         
## [166] codetools_0.2-16      fgsea_1.8.0           lattice_0.20-41      
## [169] leiden_0.3.3          gtools_3.8.1          ReactomePA_1.26.0    
## [172] GO.db_3.7.0           limma_3.38.3          survival_3.2-3       
## [175] rmarkdown_2.1         munsell_0.5.0         e1071_1.7-3          
## [178] DO.db_2.9             rhdf5_2.27.13         haven_2.3.1          
## [181] reshape2_1.4.4        gtable_0.3.0

A work by Mathew Chamberlain

mathew.chamberlain@sanofi.com